# This script is for identifying clusters based on Centroid Method
# The method is published here: 
# Please cite this script as: 


# Load packages
library(tidyverse)
library(ggradar)
library(fmsb)
library(gridExtra)
library(patchwork)
library(svglite)
library(ggrepel)
library(readxl)
library(dplyr)
library(tidyr)


# Define the file path
#Macstudio
file_path <- "/Users/kasarichard/Library/CloudStorage/Dropbox/KUTATÁS/OCAI/R/Input_data.xlsx"
#macbook:
file_path <- "/Users/richardkasa/Dropbox/KUTATÁS/OCAI/R/Input_data.xlsx"

# Define output directory for saving plots
#Macstudio
output_dir <- "/Users/kasarichard/Library/CloudStorage/Dropbox/KUTATÁS/OCAI/R/plots"
#macbook:
output_dir <- "/Users/richardkasa/Dropbox/KUTATÁS/OCAI/R/plots"

dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)  # Create if it doesn't exist


# Read the Excel file (assuming the first sheet)
data <- read_excel(file_path)

# Convert columns to numeric (in case they are read as text)
data[, -1] <- lapply(data[, -1], function(x) as.numeric(gsub(",", ".", as.character(x))))

# Check for missing values
missing_rows <- data[apply(is.na(data), 1, any), "ID"]
if (nrow(missing_rows) > 0) {
  cat("Rows with missing values (ID):\n")
  print(missing_rows)
} else {
  cat("No missing values found.\n")
}

# Check that An + Bn + Cn + Dn = 100 and Ap + Bp + Cp + Dp = 100
invalid_rows <- data[!(rowSums(data[, c("An", "Bn", "Cn", "Dn")]) == 100 &
                         rowSums(data[, c("Ap", "Bp", "Cp", "Dp")]) == 100), "ID"]

if (nrow(invalid_rows) > 0) {
  cat("Rows where An+Bn+Cn+Dn != 100 or Ap+Bp+Cp+Dp != 100 (ID):\n")
  print(invalid_rows)
} else {
  cat("All rows satisfy the summation constraint.\n")
}

# Calculate NOW coordinates
data$Nx <- (data$Bn - data$Dn) / 2
data$Ny <- (data$An - data$Cn) / 2

# Calculate PREF coordinates
data$Px <- (data$Bp - data$Dp) / 2
data$Py <- (data$Ap - data$Cp) / 2

# Calculate sample means
Nclan <- mean(data$An, na.rm = TRUE)
Nadhocracy <- mean(data$Bn, na.rm = TRUE)
Nmarket <- mean(data$Cn, na.rm = TRUE)
Nhierarchy <- mean(data$Dn, na.rm = TRUE)
Pclan <- mean(data$Ap, na.rm = TRUE)
Padhocracy <- mean(data$Bp, na.rm = TRUE)
Pmarket <- mean(data$Cp, na.rm = TRUE)
Phierarchy <- mean(data$Dp, na.rm = TRUE)



# Create a data frame with both NOW and PREFERRED values
data_radar <- tibble(
  group = c("NOW", "PREFERRED"),
  clan = c(Nclan, Pclan),
  adhocracy = c(Nadhocracy, Padhocracy),
  market = c(Nmarket, Pmarket),
  hierarchy = c(Nhierarchy, Phierarchy)
)

radar_plot <- ggradar(data_radar, 
              values.radar = c(0, 20, 40),  # Set axis ticks to 0, 25, 50
              grid.min = 0, grid.mid = 20, grid.max = 40,  # Limit max axis to 50
              axis.label.size = 5, 
              group.colours = c("#669BBC", "#C1121F"),
              group.point.size = 3,  
              group.line.width = 1.5,
              background.circle.colour = "#FDF0D5",
              ) +
  theme(legend.position = "bottom") +
  labs(title = "Radar Chart for the Sample") +  # Add title
  theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"))  
# Save radar chart
ggsave(filename = file.path(output_dir, "Radar_Plot.png"), plot = radar_plot, width = 7, height = 7, dpi = 300)
ggsave(filename = file.path(output_dir, "Radar_Plot.svg"), plot = radar_plot, width = 7, height = 7, dpi = 300, device = "svg")


# Create scatter plot for NOW
now_plot <- ggplot(data, aes(x = Nx, y = Ny)) +
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "black") +
  geom_abline(slope = -1, intercept = 0, linetype = "solid", color = "black") +
  geom_abline(slope = 0, intercept = 0, linetype = "dashed", color = "black") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") + 
  annotate("text", x = -34, y = 36, label = "Clan", color = "black", size = 5, fontface = "bold", angle = -45) +
  annotate("text", x = 36, y = -34, label = "Market", color = "black", size = 5, fontface = "bold", angle = -45) +
  annotate("text", x = 35, y = 37, label = "Adhocracy", color = "black", size = 5, fontface = "bold", angle = 45) +
  annotate("text", x = -35, y = -33, label = "Hierarchy", color = "black", size = 5, fontface = "bold", angle = 45) +
  geom_point(color = "#669BBC", size = 3, alpha = 0.7) +  # Place points last to ensure they are on top
  theme_minimal() +
  labs(title = "Perceived Organisational Culture", x = "", y = "") +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5),  # Formatting the title
    plot.margin = margin(t = 10, r = 10, b = 10, l = 10),  # Adjust margins
    panel.grid = element_blank()  # Remove grid lines
  ) +
  xlim(-40, 40) +
  ylim(-40, 40) +
  coord_fixed(ratio = 1)
print(now_plot)
ggsave(filename = file.path(output_dir, "Scatter_Now.png"), plot = now_plot, width = 7, height = 7, dpi = 300)
ggsave(filename = file.path(output_dir, "Scatter_Now.svg"),
       plot = now_plot,
       width = 7,
       height = 7,
       dpi = 300,
       device = "svg")

# Create scatter plot for PREF
pref_plot <- ggplot(data, aes(x = Px, y = Py)) +
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "black") +
  geom_abline(slope = -1, intercept = 0, linetype = "solid", color = "black") +
  geom_abline(slope = 0, intercept = 0, linetype = "dashed", color = "black") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") + 
  annotate("text", x = -34, y = 36, label = "Clan", color = "black", size = 5, fontface = "bold", angle = -45) +
  annotate("text", x = 36, y = -34, label = "Market", color = "black", size = 5, fontface = "bold", angle = -45) +
  annotate("text", x = 35, y = 37, label = "Adhocracy", color = "black", size = 5, fontface = "bold", angle = 45) +
  annotate("text", x = -35, y = -33, label = "Hierarchy", color = "black", size = 5, fontface = "bold", angle = 45) +
  geom_point(color = "#C1121F", size = 3, alpha = 0.7) +  # Place points last to ensure they are on top
  theme_minimal() +
  labs(title = "Preferred Organisational Culture", x = "", y = "") +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5),  # Formatting the title
    plot.margin = margin(t = 20, r = 20, b = 20, l = 20),  # Adjust margins
    panel.grid = element_blank()  # Remove grid lines
  ) +
  xlim(-40, 40) +
  ylim(-40, 40) +
  coord_fixed(ratio = 1)
print(pref_plot)
ggsave(filename = file.path(output_dir, "Scatter_Pref.png"), plot = pref_plot, width = 7, height = 7, dpi = 300)
ggsave(filename = file.path(output_dir, "Scatter_Pref.svg"), plot = pref_plot, width = 7, height = 7, dpi = 300, device = "svg")

# Calculate centroids
centroid_now <- data %>%
  summarize(X = mean(Nx, na.rm = TRUE), Y = mean(Ny, na.rm = TRUE)) %>%
  mutate(Category = "NOW")

centroid_pref <- data %>%
  summarize(X = mean(Px, na.rm = TRUE), Y = mean(Py, na.rm = TRUE)) %>%
  mutate(Category = "PREF")

# Combine centroid data
centroids <- bind_rows(centroid_now, centroid_pref)

# Create a new dataset combining NOW and PREF data
data_combined <- data %>%
  rename(NowX = Nx, NowY = Ny, PrefX = Px, PrefY = Py) %>%
  pivot_longer(cols = c(NowX, PrefX), names_to = "TypeX", values_to = "X") %>%
  pivot_longer(cols = c(NowY, PrefY), names_to = "TypeY", values_to = "Y") %>%
  filter(substr(TypeX, 1, 3) == substr(TypeY, 1, 3)) %>%  # Ensure we match NowX with NowY, PrefX with PrefY
  mutate(Category = ifelse(substr(TypeX, 1, 3) == "Now", "NOW", "PREF"))
# Plot both NOW and PREF values in one graph with centroids and arrow
combined_plot <- ggplot(data_combined, aes(x = X, y = Y, color = Category)) +
  # Add quadrant lines
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "black") +
  geom_abline(slope = -1, intercept = 0, linetype = "solid", color = "black") +
  geom_abline(slope = 0, intercept = 0, linetype = "dashed", color = "black") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") + 
    # Add quadrant labels
  annotate("text", x = -34, y = 36, label = "Clan", color = "black", size = 5, fontface = "bold", angle = -45) +
  annotate("text", x = 36, y = -34, label = "Market", color = "black", size = 5, fontface = "bold", angle = -45) +
  annotate("text", x = 35, y = 37, label = "Adhocracy", color = "black", size = 5, fontface = "bold", angle = 45) +
  annotate("text", x = -35, y = -33, label = "Hierarchy", color = "black", size = 5, fontface = "bold", angle = 45) +
    # Scatter plot of points (NOW and PREF)
  geom_point(size = 3, alpha = 0.7) +  
    # Centroid points with black stroke and fill based on Category
  geom_point(data = centroids, aes(x = X, y = Y, fill = Category), 
             size = 5, shape = 23, color = "black", stroke = 1.5) +  
    # Arrow from NOW to PREF centroid
  geom_segment(data = centroid_now, aes(x = X, y = Y, xend = centroid_pref$X, yend = centroid_pref$Y), 
               arrow = arrow(length = unit(0.1, "inches"), type = "closed"), 
               color = "black", size = 1.00) +  
    # Define colors for NOW and PREF
  scale_color_manual(values = c("NOW" = "#669BBC", "PREF" = "#C1121F")) +  
  scale_fill_manual(values = c("NOW" = "#669BBC", "PREF" = "#C1121F")) +  # Fill for centroids
    # Theme and Labels
  theme_minimal() +
  labs(title = "Perceived vs Preferred Organisational Culture", x = "", y = "", color = "Legend", fill = "Legend") +
        theme(
          text = element_text(size = 14),
          plot.title = element_text(size = 18, face = "bold", hjust = 0.5),  # Formatting the title
          plot.margin = margin(t = 20, r = 20, b = 20, l = 20),  # Adjust margins
          panel.grid = element_blank(),  # Remove grid lines
          legend.position = "bottom"  # Move legend to bottom
          ) +
    # Axis limits
  xlim(-40, 40) +
  ylim(-40, 40) +
  coord_fixed(ratio = 1)
print(combined_plot)
ggsave(filename = file.path(output_dir, "Combined_Plot.png"), plot = combined_plot, width = 7, height = 7.48, dpi = 300)
ggsave(filename = file.path(output_dir, "Combined_Plot.svg"), plot = combined_plot, width = 7, height = 7.48, dpi = 300, device = "svg")

### SUBCULTURES
## Distance from origin
data$Now_Distance <- sqrt(data$Nx^2 + data$Ny^2)
data$Pref_Distance <- sqrt(data$Px^2 + data$Py^2)
## Calculate Compass Tangent (Bearing in Degrees)
data$Now_Angle <- (atan2(data$Ny, data$Nx) * 180 / pi) %% 360
data$Pref_Angle <- (atan2(data$Py, data$Px) * 180 / pi) %% 360

## Calculating distance descriptive
DistancemeanNOW <- mean(data$Now_Distance, na.rm = TRUE)
DistancemeanPREF <- mean(data$Pref_Distance, na.rm = TRUE)
DistanceSDNOW <- sd(data$Now_Distance, na.rm = TRUE)
DistanceSDPREF <- sd(data$Pref_Distance, na.rm = TRUE)
RelativeSDNOW <- DistanceSDNOW/DistancemeanNOW
RelativeSDPREF <- DistanceSDPREF/DistancemeanPREF

data$now_d_s <- ifelse(
  data$Now_Distance == "missing", "",  # If Now_Distance is "missing", return an empty string
  ifelse(data$Now_Distance < DistancemeanNOW & data$Now_Distance >= RelativeSDNOW, "(slight)", "(dominant)")
)
data$pref_d_s <- ifelse(
  data$Pref_Distance == "missing", "",  # If Now_Distance is "missing", return an empty string
  ifelse(data$Pref_Distance < DistancemeanPREF & data$Pref_Distance >= RelativeSDPREF, "(slight)", "(dominant)")
)
###NOW SUBCULTURES
data$NOW_subc <- ifelse(
  is.na(data$Now_Distance) | is.na(data$Now_Angle) | is.na(data$Nx) | is.na(data$Ny), "", # Handling errors
  
  ifelse(data$Now_Distance < RelativeSDNOW, "BALANCED",
         ifelse(data$Now_Angle >= 60 & data$Now_Angle <= 120, "CLAN",
                ifelse(data$Now_Angle >= 150 & data$Now_Angle <= 210, "HIERARCHY",
                       ifelse(data$Now_Angle >= 240 & data$Now_Angle <= 300, "MARKET",
                              ifelse((data$Now_Angle <= 30 & data$Now_Angle >= 0) | (data$Now_Angle >= 330 & data$Now_Angle <= 360), "ADHOCRACY",
                                     ifelse(data$Now_Angle > 30 & data$Now_Angle < 60, "Clan-Adhocracy",
                                            ifelse(data$Now_Angle > 120 & data$Now_Angle < 150, "Clan-Hierarchy",
                                                   ifelse(data$Now_Angle > 210 & data$Now_Angle < 240, "Market-Hierarchy",
                                                          ifelse(data$Now_Angle > 300 & data$Now_Angle < 330, "Market-Adhocracy",
                                                                 ifelse(data$Nx == 0 & data$Ny == 0, "Origo", "missing")
                                                          )
                                                   )
                                            )
                                     )
                              )
                       )
                )
         )
  )
)

###PREF SUBCULTURES
data$PREF_subc <- ifelse(
  is.na(data$Pref_Distance) | is.na(data$Pref_Angle) | is.na(data$Px) | is.na(data$Py), "", # Handling errors
  
  ifelse(data$Pref_Distance < RelativeSDPREF, "BALANCED",
         ifelse(data$Pref_Angle >= 60 & data$Pref_Angle <= 120, "CLAN",
                ifelse(data$Pref_Angle >= 150 & data$Pref_Angle <= 210, "HIERARCHY",
                       ifelse(data$Pref_Angle >= 240 & data$Pref_Angle <= 300, "MARKET",
                              ifelse((data$Pref_Angle <= 30 & data$Pref_Angle >= 0) | (data$Pref_Angle >= 330 & data$Pref_Angle <= 360), "ADHOCRACY",
                                     ifelse(data$Pref_Angle > 30 & data$Pref_Angle < 60, "Clan-Adhocracy",
                                            ifelse(data$Pref_Angle > 120 & data$Pref_Angle < 150, "Clan-Hierarchy",
                                                   ifelse(data$Pref_Angle > 210 & data$Pref_Angle < 240, "Market-Hierarchy",
                                                          ifelse(data$Pref_Angle > 300 & data$Pref_Angle < 330, "Market-Adhocracy",
                                                                 ifelse(data$Px == 0 & data$Py == 0, "Origo", "missing")
                                                          )
                                                   )
                                            )
                                     )
                              )
                       )
                )
         )
  )
)

# NOW and PREF Coxcomb Diagramm
# Define fixed order for subcultures (same order for both NOW and PREF)
subc_order <- c("CLAN", "Clan-Adhocracy", "ADHOCRACY", "Market-Adhocracy",
                "MARKET", "Market-Hierarchy", "HIERARCHY", "Clan-Hierarchy", "BALANCED")

### --- NOW Coxcomb Diagram ---
# Compute frequency distribution for NOW_subc
now_summary <- data %>%
  filter(NOW_subc != "BALANCED") %>%
  group_by(NOW_subc) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100) %>%
  mutate(NOW_subc = factor(NOW_subc, levels = subc_order[!subc_order %in% "BALANCED"])) %>%
  arrange(match(NOW_subc, subc_order[!subc_order %in% "BALANCED"]))

# Compute share of BALANCED
balanced_share_now <- data %>%
  count(NOW_subc, name = "n") %>%
  mutate(percentage = n / sum(n) * 100) %>%
  filter(NOW_subc == "BALANCED") %>%
  pull(percentage)
# Calculate y-axis and inner radius
y_limit_now <- ceiling(max(now_summary$percentage) / 5) * 5
inner_radius_now <- -y_limit_now * (balanced_share_now*10 / 100)

# Get actual sample size for NOW
n_now <- sum(now_summary$count)

# Dynamically calculate the y-axis limit (rounded up to the nearest multiple of 5)
y_limit_now <- ceiling(max(now_summary$percentage) / 5) * 5  

# Create NOW Coxcomb Diagram
# --- Create BALANCED label ---
balanced_label_now <- paste0("BALANCED\n= ", round(balanced_share_now, 1), "%")

# --- Coxcomb Plot ---
coxcomb_now <- ggplot(now_summary) +
  geom_hline(yintercept = seq(0, y_limit_now, by = 5), color = "lightgrey") +
  geom_col(aes(x = NOW_subc, y = percentage, fill = percentage),
           width = 1, color = "black", show.legend = TRUE, alpha = 0.9) +
  scale_fill_gradientn("Percentage (%)", colours = c("#6C5B7B", "#C06C84", "#F67280", "#F8B195")) +
  
  # Percentage labels below category names
  geom_text(aes(x = NOW_subc, y = 5, label = paste0(round(percentage, 1), "%")),  
            size = 5, color = "black", fontface = "italic", vjust = 1) +
  
  # Add BALANCED text in center hole
  geom_text(aes(x = 1, y = inner_radius_now / 2, label = balanced_label_now),
            inherit.aes = FALSE,
            size = 3, color = "gray30",
            hjust = 0.7, vjust = 1.3) +

  
  labs(title = "Distribution of Subcultures as Perceived",
       subtitle = "Percentages of NOW Subcultures as a Coxcomb Diagram",
       caption = paste("Sample size: n=", n_now)) +  
  coord_polar() +
  
  scale_y_continuous(limits = c(inner_radius_now, y_limit_now), expand = c(0, 0), breaks = seq(0, y_limit_now, by = 5)) +
  
  theme_minimal() +
  theme(legend.position = c(-0.1, 0.1),
        legend.direction = "vertical",
        legend.box.margin = margin(t = 10, b = 10, l = 20, r = -20),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_text(color = "gray12", size = 12),
        panel.grid = element_blank(),
        text = element_text(color = "gray12", family = "Arial"),
        plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
        plot.subtitle = element_text(size = 14, hjust = 0.5),
        plot.caption = element_text(size = 10, hjust = 0.5),
        plot.margin = margin(t = 10, r = 50, b = 10, l = 50, unit = "pt"))

# --- Save and print the plot ---
print(coxcomb_now)
ggsave(filename = "NOW_subc_Coxcomb.png", plot = coxcomb_now, width = 10, height = 10, units = "in", dpi = 300)
ggsave(
  filename = file.path(output_dir, "NOW_subc_Coxcomb.jpg"),
  plot = coxcomb_now,
  width = 15,
  height = 10,
  units = "in",
  dpi = 300
)

### --- PREF Coxcomb Diagram ---
# Compute frequency distribution for PREF_subc
pref_summary <- data %>%
  filter(PREF_subc != "BALANCED") %>%
  group_by(PREF_subc) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100) %>%
  mutate(PREF_subc = factor(PREF_subc, levels = subc_order[!subc_order %in% "BALANCED"])) %>%
  arrange(match(PREF_subc, subc_order[!subc_order %in% "BALANCED"]))

# Compute share of BALANCED
balanced_share_pref <- data %>%
  count(PREF_subc, name = "n") %>%
  mutate(percentage = n / sum(n) * 100) %>%
  filter(PREF_subc == "BALANCED") %>%
  pull(percentage)

# Get actual sample size for PREF
n_pref <- sum(pref_summary$count)

# Dynamically calculate the y-axis limit (rounded up to the nearest multiple of 5)
y_limit_pref <- ceiling(max(pref_summary$percentage) / 5) * 5  

# Calculate y-axis and inner radius
y_limit_pref <- ceiling(max(pref_summary$percentage) / 5) * 5
inner_radius_pref <- -y_limit_pref * (balanced_share_pref / 100)

# --- Create BALANCED label ---
balanced_label_pref <- paste0("BALANCED\n= ", round(balanced_share_pref, 1), "%")

# Create PREF Coxcomb Diagram
coxcomb_pref <- ggplot(pref_summary) +
  geom_hline(yintercept = seq(0, y_limit_pref, by = 5), color = "lightgrey") +
  geom_col(aes(x = PREF_subc, y = percentage, fill = percentage),
           width = 1, color = "black", show.legend = TRUE, alpha = 0.9) +
  scale_fill_gradientn("Percentage (%)", colours = c("#6C5B7B", "#C06C84", "#F67280", "#F8B195")) +
  
  # Place percentage labels BELOW the category names
  geom_text(aes(x = PREF_subc, y = 5, label = paste0(round(percentage, 1), "%")),  
            size = 5, color = "black", fontface = "italic", vjust = 1) +
  
  # Add BALANCED text in center hole
  geom_text(aes(x = 1, y = inner_radius_pref / 2, label = balanced_label_pref),
            inherit.aes = FALSE,
            size = 3, color = "gray30",
            hjust = 0.7, vjust = 1.6) +
  
  labs(title = "Distribution of Subcultures as Preferred",
       subtitle = "Percentages of PREF Subcultures as a Coxcomb Diagram",
       caption = paste("Sample size: n=", n_pref)) +  
  coord_polar() +
  
  # Extend y-axis limits downward to make space for labels
  scale_y_continuous(limits = c(-y_limit_pref * 0.2, y_limit_pref), expand = c(0, 0), breaks = seq(0, y_limit_pref, by = 5)) +
  
  theme_minimal() +
  theme(legend.position = c(-0.1, 0.1),
        legend.direction = "vertical",
        legend.box.margin = margin(t = 10, b = 10, l = 20, r = -20),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_text(color = "gray12", size = 12),
        panel.grid = element_blank(),
        text = element_text(color = "gray12", family = "Arial"),
        plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
        plot.subtitle = element_text(size = 14, hjust = 0.5),
        plot.caption = element_text(size = 10, hjust = 0.5),
        plot.margin = margin(t = 10, r = 50, b = 10, l = 50, unit = "pt"))

# Display and save PREF Coxcomb Diagram
print(coxcomb_pref)
ggsave(filename = "PREF_subc_Coxcomb.png", plot = coxcomb_pref, width = 10, height = 10, units = "in", dpi = 300)
ggsave(
  filename = file.path(output_dir, "PREF_subc_Coxcomb.jpg"),
  plot = coxcomb_pref,
  width = 15,
  height = 10,
  units = "in",
  dpi = 300
)

